
# library(pacman)
# p_load(dplyr, stringr, tidyr, wbstats, DBI, RSQLite, countrycode, modelsummary, fixest, kableExtra) 
library(dplyr)
library(stringr)
library(tidyr)
library(wbstats)
library(DBI)
library(RSQLite)
library(countrycode)
library(modelsummary)
library(fixest)
library(kableExtra)

options(modelsummary_factory_default = 'kableExtra')

##### PREPPING DATA #####

GDP <- wb_data("NY.GDP.MKTP.CD", start_date = 1990, end_date = 2023) %>% rename(year = date) %>%
  mutate(country = ifelse(country == "Turkiye", "Turkey", country)) %>%
  mutate(country_code = countrycode(sourcevar = country,
                                    origin = "country.name",
                                    destination = "iso3c")) 

EXP <- wb_data("NE.EXP.GNFS.CD", start_date = 1990, end_date = 2023) %>% rename(year = date)  %>%
  mutate(country = ifelse(country == "Turkiye", "Turkey", country)) %>%
  mutate(country_code = countrycode(sourcevar = country,
                                    origin = "country.name",
                                    destination = "iso3c")) 

POP <- wb_data("SP.POP.TOTL", start_date = 1990, end_date = 2023) %>% rename(year = date)  %>%
  mutate(country = ifelse(country == "Turkiye", "Turkey", country)) %>%
  mutate(country_code = countrycode(sourcevar = country,
                                    origin = "country.name",
                                    destination = "iso3c")) 

IND <- wb_data("NV.IND.TOTL.ZS", start_date = 1990, end_date = 2023) %>% rename(year = date)  %>%
  mutate(country = ifelse(country == "Turkiye", "Turkey", country)) %>%
  mutate(country_code = countrycode(sourcevar = country,
                                    origin = "country.name",
                                    destination = "iso3c")) 

ICT <- wb_data("BX.GSR.CCIS.ZS", start_date = 1990, end_date = 2023) %>% rename(year = date)  %>%
  mutate(country = ifelse(country == "Turkiye", "Turkey", country)) %>%
  mutate(country_code = countrycode(sourcevar = country,
                                    origin = "country.name",
                                    destination = "iso3c")) 

RND <- wb_data("GB.XPD.RSDV.GD.ZS", start_date = 1990, end_date = 2023) %>% rename(year = date)  %>%
  mutate(country = ifelse(country == "Turkiye", "Turkey", country)) %>%
  mutate(country_code = countrycode(sourcevar = country,
                                    origin = "country.name",
                                    destination = "iso3c")) 

con <- dbConnect(RSQLite::SQLite(), "iso_standards.sqlite")

country_certifications <- dbReadTable(con, "country_certifications") %>%
  mutate(country = ifelse(country == "Russion Federation", "Russian Federation", 
                          ifelse(country == "Ethopia", "Ethiopia",
                                 ifelse(country == "Albana", "Albania",
                                        ifelse(country == "Taiwan, Province of China", "Taiwan",
                                               ifelse(country == "Taipei, Chinese", "Taiwan",
                                                      ifelse(country == "Cocos (Keeling) Islands, Kosovo", "Kosovo",
                                                             ifelse(country == "Netherlands Antilles (NL)", "Netherlands Antilles",
                                                                    ifelse(country == "United Kingdom of Great Britain and Northern Ireland", "United Kingdom",
                                                                           ifelse(country == "Korea, Republic of", "South Korea",
                                                                                  ifelse(country == "United States of America", "United States",
                                                                                         ifelse(country == "Hong Kong, China", "Hong Kong",
                                                                                                ifelse(country == "Viet Nam", "Vietnam",
                                                                                                       country)))))))))))))

dbDisconnect(con)

cert_agg <- country_certifications %>%
  mutate(continent = countrycode(sourcevar = country,
                                 origin = "country.name",
                                 destination = "un.region.name")) %>%
  mutate(iso_name = str_to_upper(iso),
         iso_name = str_replace_all(iso_name, "_", " "),
         iso_name = str_squish(iso_name),
         iso_name = ifelse(iso_name == "ISO IEC 27001", "ISO 27001", 
                           ifelse(iso_name == "ISO IEC 20000-1", "ISO 20000 1", iso_name))) %>%
  distinct(country, year, iso, iso_name, .keep_all = TRUE) %>%
  mutate(certificates = replace_na(certificates, 0))

certifications <- cert_agg %>%
  mutate(year = as.numeric(year)) %>%
  select(-iso_name) %>%
  filter(iso %in% c("iso_9001", "iso_14001", "iso_iec_27001")) %>%
  spread(iso, certificates) %>%
  group_by(country) %>%
  arrange(desc(-year)) %>%
  mutate(iso_14001_prev = cumsum(replace_na(iso_14001, 0)),
         iso_9001_prev = cumsum(replace_na(iso_9001, 0)),
         iso_iec_27001_prev = cumsum(replace_na(iso_iec_27001, 0))) %>%
  ungroup() %>%
  mutate(country_code = countrycode(sourcevar = country,
                                    origin = "country.name",
                                    destination = "iso3c")) %>%
  left_join(GDP %>% select(country_code, year, NY.GDP.MKTP.CD) %>% unique(), by = join_by(country_code, year)) %>%
  mutate(NY.GDP.MKTP.CD = ifelse(country == "Taiwan" & year == 1993, 236300000000,
                                 ifelse(country == "Taiwan" & year == 1994, 256200000000,
                                        ifelse(country == "Taiwan" & year == 1995, 279100000000,
                                               ifelse(country == "Taiwan" & year == 1996, 292500000000,
                                                      ifelse(country == "Taiwan" & year == 1997, 303300000000,
                                                             ifelse(country == "Taiwan" & year == 1998, 280000000000,
                                                                    ifelse(country == "Taiwan" & year == 1999, 303800000000,
                                                                           NY.GDP.MKTP.CD)))))))) %>%
  left_join(EXP %>% select(country_code, year, NE.EXP.GNFS.CD) %>% unique(), by = join_by(country_code, year)) %>%
  left_join(POP %>% select(country_code, year, SP.POP.TOTL) %>% unique(), by = join_by(country_code, year)) %>%
  left_join(IND %>% select(country_code, year, NV.IND.TOTL.ZS) %>% unique(), by = join_by(country_code, year)) %>%
  left_join(ICT %>% select(country_code, year, BX.GSR.CCIS.ZS) %>% unique(), by = join_by(country_code, year)) %>%
  left_join(RND %>% select(country_code, year, GB.XPD.RSDV.GD.ZS) %>% unique(), by = join_by(country_code, year)) %>%
  group_by(country_code) %>%
  ungroup() 

cert_data <- certifications %>% 
  mutate(gdp_bill = NY.GDP.MKTP.CD/100000000) %>%
  mutate(I9GDP = iso_9001/gdp_bill) %>%
  mutate(I14GDP = iso_14001/gdp_bill) %>%
  mutate(I27GDP = iso_iec_27001/gdp_bill) %>%
  mutate(I9GDP_cum = iso_9001_prev/gdp_bill,
         I14GDP_cum = iso_14001_prev/gdp_bill,
         I27GDP_cum = iso_iec_27001_prev/gdp_bill) %>%
  mutate(GDPPOP = gdp_bill/SP.POP.TOTL) %>%
  mutate(EXPGDP = NE.EXP.GNFS.CD/NY.GDP.MKTP.CD) %>%
  rename(IND = NV.IND.TOTL.ZS,
         RND = GB.XPD.RSDV.GD.ZS,
         ICT = BX.GSR.CCIS.ZS)

##### ANALYSIS #####

mod1 <- feols(I14GDP ~ lag(I9GDP, 1) + GDPPOP + log(EXPGDP) + IND |
                country + year,
              cluster = "country+year", data = cert_data %>% filter(year %in% 1999:2022))

mod2 <- feols(I27GDP ~ lag(I9GDP, 1) + GDPPOP + log(EXPGDP) + IND | 
                country + year,
              cluster = "country+year", data = cert_data %>% filter(year %in% 2006:2022))

mod3 <- feols(I27GDP ~ lag(I14GDP, 1) + lag(I9GDP, 1) + GDPPOP + log(EXPGDP) + IND |
                country + year,
              cluster = "country+year", data = cert_data %>% filter(year %in% 2006:2022))

cm <- c("lag(I9GDP, 1)" = "Certification in ISO 9001 (1 year lag)", 
        "lag(I14GDP, 1)" = "Certification in ISO 14001 (1 year lag)",
        "ICT" = "ICT % of service exports",
        "RND" = "Research and development expenditure (% of GDP)",
        "GDPPOP" = "GDP per capita",
        "log(EXPGDP)" = "Exports per GDP (ln)",
        "IND" = "Industry value added (% of GDP)")

models <- list(mod1, mod2, mod3)
names(models) <- c("ISO 14001", "ISO 27001", "ISO 27001")

rows <- tibble::tribble(~start, ~mod1, ~mod2, ~mod3,
                        "Fixed effects", "Yes", "Yes", "Yes",
                        "Time series", "1999-2022", "2006-2022", "2006-2022 ")

tabsummary <- modelsummary(models,
                           fmt = 3,
                           coef_map = cm,
                           #output = "latex",
                           stars = TRUE,
                           statistic = c("{std.error}"),
                           gof_omit = 'AIC|BIC|Within|Std.Errors|R2|FE',
                           notes = c("Fixed effects: Country and year.",
                                     "Clustered standard errors by country and year.",
                                     "Model: OLS"),
                           add_rows = rows,
                           coef_omit = c("Intercept"))


tabsummary %>%
  add_header_above(c(" " = 1, "Dependent variable: Certifications" = 3)) %>%
  kable_styling(font_size = 10, full_width = FALSE) 


## CUMULATIVE

mod1 <- feols(I14GDP ~ lag(I9GDP_cum, 1) + GDPPOP + log(EXPGDP) + IND |
                country + year,
              cluster = "country+year", data = cert_data %>% filter(year %in% 1999:2022))

mod2 <- feols(I27GDP_cum ~ lag(I9GDP_cum, 1) + GDPPOP + log(EXPGDP) + IND | 
                country + year,
              cluster = "country+year", data = cert_data %>% filter(year %in% 2006:2022))

mod3 <- feols(I27GDP_cum ~ lag(I14GDP_cum, 1) + lag(I9GDP_cum, 1) + GDPPOP + log(EXPGDP) + IND |
                country + year,
              cluster = "country+year", data = cert_data %>% filter(year %in% 2006:2022))

cm <- c("lag(I9GDP_cum, 1)" = "Cumulative certification in ISO 9001 (1 year lag)", 
        "lag(I14GDP_cum, 1)" = "Cumulative certification in ISO 14001 (1 year lag)",
        "ICT" = "ICT % of service exports",
        "RND" = "Research and development expenditure (% of GDP)",
        "GDPPOP" = "GDP per capita",
        "log(EXPGDP)" = "Exports per GDP (ln)",
        "IND" = "Industry value added (% of GDP)")

models <- list(mod1, mod2, mod3)
names(models) <- c("ISO 14001", "ISO 27001", "ISO 27001")

rows <- tibble::tribble(~start, ~mod1, ~mod2, ~mod3,
                        "Fixed effects", "Yes", "Yes", "Yes",
                        "Time series", "1999-2022", "2006-2022", "2006-2022 ")

tabsummary <- modelsummary(models,
                           fmt = 3,
                           coef_map = cm,
                           #output = "latex",
                           stars = TRUE,
                           statistic = c("{std.error}"),
                           gof_omit = 'AIC|BIC|Within|Std.Errors|R2|FE',
                           notes = c("Fixed effects: Country and year.",
                                     "Clustered standard errors by country and year."),
                           add_rows = rows,
                           coef_omit = c("Intercept"))


tabsummary %>%
  add_header_above(c(" " = 1, "Dependent variable: Certifications" = 3)) %>%
  kable_styling(font_size = 10, full_width = FALSE) 


